{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "第三周作业：预测房子受欢迎程度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导入工具包\n",
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "from matplotlib import pyplot\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.72\n"
     ]
    }
   ],
   "source": [
    "#读入数据\n",
    "print(xgb.__version__)\n",
    "file = open('C:/Users/chenxi/Desktop/disanzhouzuoye/第三周作业/code/data/RentListingInquries_FE_train.csv')\n",
    "train = pd.read_csv(file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 49352 entries, 0 to 49351\n",
      "Columns: 228 entries, bathrooms to interest_level\n",
      "dtypes: float64(9), int64(219)\n",
      "memory usage: 85.8 MB\n"
     ]
    }
   ],
   "source": [
    "#数据探索\n",
    "train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>...</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "      <th>interest_level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.5</td>\n",
       "      <td>3</td>\n",
       "      <td>3000</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>750.000000</td>\n",
       "      <td>-1.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>24</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>5465</td>\n",
       "      <td>2732.5</td>\n",
       "      <td>1821.666667</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2850</td>\n",
       "      <td>1425.0</td>\n",
       "      <td>1425.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>17</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3275</td>\n",
       "      <td>1637.5</td>\n",
       "      <td>1637.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>18</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3350</td>\n",
       "      <td>1675.0</td>\n",
       "      <td>670.000000</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 228 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bathrooms  bedrooms  price  price_bathrooms  price_bedrooms  room_diff  \\\n",
       "0        1.5         3   3000           1200.0      750.000000       -1.5   \n",
       "1        1.0         2   5465           2732.5     1821.666667       -1.0   \n",
       "2        1.0         1   2850           1425.0     1425.000000        0.0   \n",
       "3        1.0         1   3275           1637.5     1637.500000        0.0   \n",
       "4        1.0         4   3350           1675.0      670.000000       -3.0   \n",
       "\n",
       "   room_num  Year  Month  Day       ...        walk  walls  war  washer  \\\n",
       "0       4.5  2016      6   24       ...           0      0    0       0   \n",
       "1       3.0  2016      6   12       ...           0      0    0       0   \n",
       "2       2.0  2016      4   17       ...           0      0    0       0   \n",
       "3       2.0  2016      4   18       ...           0      0    0       0   \n",
       "4       5.0  2016      4   28       ...           0      0    1       0   \n",
       "\n",
       "   water  wheelchair  wifi  windows  work  interest_level  \n",
       "0      0           0     0        0     0               1  \n",
       "1      0           0     0        0     0               2  \n",
       "2      0           0     0        0     0               0  \n",
       "3      0           0     0        0     0               2  \n",
       "4      0           0     0        0     0               2  \n",
       "\n",
       "[5 rows x 228 columns]"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>...</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "      <th>interest_level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>49352.00000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>4.935200e+04</td>\n",
       "      <td>4.935200e+04</td>\n",
       "      <td>4.935200e+04</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.0</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.21218</td>\n",
       "      <td>1.541640</td>\n",
       "      <td>3.830174e+03</td>\n",
       "      <td>1.697863e+03</td>\n",
       "      <td>1.657567e+03</td>\n",
       "      <td>-0.329460</td>\n",
       "      <td>2.753820</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>5.014852</td>\n",
       "      <td>15.206881</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003080</td>\n",
       "      <td>0.000385</td>\n",
       "      <td>0.186477</td>\n",
       "      <td>0.009361</td>\n",
       "      <td>0.000446</td>\n",
       "      <td>0.028165</td>\n",
       "      <td>0.002026</td>\n",
       "      <td>0.001013</td>\n",
       "      <td>0.000952</td>\n",
       "      <td>1.616895</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.50142</td>\n",
       "      <td>1.115018</td>\n",
       "      <td>2.206687e+04</td>\n",
       "      <td>1.100477e+04</td>\n",
       "      <td>7.817996e+03</td>\n",
       "      <td>0.947732</td>\n",
       "      <td>1.446091</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.824442</td>\n",
       "      <td>8.280749</td>\n",
       "      <td>...</td>\n",
       "      <td>0.055412</td>\n",
       "      <td>0.019618</td>\n",
       "      <td>0.389495</td>\n",
       "      <td>0.101625</td>\n",
       "      <td>0.021109</td>\n",
       "      <td>0.165446</td>\n",
       "      <td>0.044969</td>\n",
       "      <td>0.031814</td>\n",
       "      <td>0.030846</td>\n",
       "      <td>0.626035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.300000e+01</td>\n",
       "      <td>2.150000e+01</td>\n",
       "      <td>4.300000e+01</td>\n",
       "      <td>-5.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.500000e+03</td>\n",
       "      <td>1.225000e+03</td>\n",
       "      <td>1.066667e+03</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.150000e+03</td>\n",
       "      <td>1.500000e+03</td>\n",
       "      <td>1.383417e+03</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>4.100000e+03</td>\n",
       "      <td>1.850000e+03</td>\n",
       "      <td>1.962500e+03</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>10.00000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>4.490000e+06</td>\n",
       "      <td>2.245000e+06</td>\n",
       "      <td>1.496667e+06</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>13.500000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>31.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 228 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         bathrooms      bedrooms         price  price_bathrooms  \\\n",
       "count  49352.00000  49352.000000  4.935200e+04     4.935200e+04   \n",
       "mean       1.21218      1.541640  3.830174e+03     1.697863e+03   \n",
       "std        0.50142      1.115018  2.206687e+04     1.100477e+04   \n",
       "min        0.00000      0.000000  4.300000e+01     2.150000e+01   \n",
       "25%        1.00000      1.000000  2.500000e+03     1.225000e+03   \n",
       "50%        1.00000      1.000000  3.150000e+03     1.500000e+03   \n",
       "75%        1.00000      2.000000  4.100000e+03     1.850000e+03   \n",
       "max       10.00000      8.000000  4.490000e+06     2.245000e+06   \n",
       "\n",
       "       price_bedrooms     room_diff      room_num     Year         Month  \\\n",
       "count    4.935200e+04  49352.000000  49352.000000  49352.0  49352.000000   \n",
       "mean     1.657567e+03     -0.329460      2.753820   2016.0      5.014852   \n",
       "std      7.817996e+03      0.947732      1.446091      0.0      0.824442   \n",
       "min      4.300000e+01     -5.000000      0.000000   2016.0      4.000000   \n",
       "25%      1.066667e+03     -1.000000      2.000000   2016.0      4.000000   \n",
       "50%      1.383417e+03      0.000000      2.000000   2016.0      5.000000   \n",
       "75%      1.962500e+03      0.000000      4.000000   2016.0      6.000000   \n",
       "max      1.496667e+06      8.000000     13.500000   2016.0      6.000000   \n",
       "\n",
       "                Day       ...                walk         walls           war  \\\n",
       "count  49352.000000       ...        49352.000000  49352.000000  49352.000000   \n",
       "mean      15.206881       ...            0.003080      0.000385      0.186477   \n",
       "std        8.280749       ...            0.055412      0.019618      0.389495   \n",
       "min        1.000000       ...            0.000000      0.000000      0.000000   \n",
       "25%        8.000000       ...            0.000000      0.000000      0.000000   \n",
       "50%       15.000000       ...            0.000000      0.000000      0.000000   \n",
       "75%       22.000000       ...            0.000000      0.000000      0.000000   \n",
       "max       31.000000       ...            1.000000      1.000000      1.000000   \n",
       "\n",
       "             washer         water    wheelchair          wifi       windows  \\\n",
       "count  49352.000000  49352.000000  49352.000000  49352.000000  49352.000000   \n",
       "mean       0.009361      0.000446      0.028165      0.002026      0.001013   \n",
       "std        0.101625      0.021109      0.165446      0.044969      0.031814   \n",
       "min        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "25%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "50%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "75%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "max        2.000000      1.000000      1.000000      1.000000      1.000000   \n",
       "\n",
       "               work  interest_level  \n",
       "count  49352.000000    49352.000000  \n",
       "mean       0.000952        1.616895  \n",
       "std        0.030846        0.626035  \n",
       "min        0.000000        0.000000  \n",
       "25%        0.000000        1.000000  \n",
       "50%        0.000000        2.000000  \n",
       "75%        0.000000        2.000000  \n",
       "max        1.000000        2.000000  \n",
       "\n",
       "[8 rows x 228 columns]"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "#数据准备\n",
    "y_train = train['interest_level']\n",
    "X_train = train.drop([\"interest_level\"], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:2026: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "#划分数据集\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train_part, X_val, y_train_part, y_val = train_test_split(X_train, y_train, train_size = 0.33,random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义函数\n",
    "def modelfit(alg, X_train, y_train, nfold=5, early_stopping_rounds=10):\n",
    "    xgb_param = alg.get_xgb_params()\n",
    "    xgb_param['num_class'] = 3\n",
    "    #直接调用xgboost，而非sklarn的wrapper类\n",
    "    xgtrain = xgb.DMatrix(X_train, label = y_train) \n",
    "    cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=5,\n",
    "             metrics='mlogloss', early_stopping_rounds=early_stopping_rounds)\n",
    "    cvresult.to_csv('1_nestimators.csv', index_label = 'n_estimators')\n",
    "    #最佳参数n_estimators\n",
    "    n_estimators = cvresult.shape[0]\n",
    "    # 采用交叉验证得到的最佳参数n_estimators，训练模型\n",
    "    alg.set_params(n_estimators = n_estimators)\n",
    "    alg.fit(X_train, y_train, eval_metric='mlogloss')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "xgb1 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=1000,  #数值先随便给一个，cv会自动返回合适的n_estimators\n",
    "        max_depth=6,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample = 0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel=0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "modelfit(xgb1, X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'base_score': 0.5,\n",
       " 'booster': 'gbtree',\n",
       " 'colsample_bylevel': 0.7,\n",
       " 'colsample_bytree': 0.8,\n",
       " 'gamma': 0,\n",
       " 'learning_rate': 0.1,\n",
       " 'max_delta_step': 0,\n",
       " 'max_depth': 6,\n",
       " 'min_child_weight': 1,\n",
       " 'missing': None,\n",
       " 'n_estimators': 263,\n",
       " 'nthread': 1,\n",
       " 'objective': 'multi:softprob',\n",
       " 'reg_alpha': 0,\n",
       " 'reg_lambda': 1,\n",
       " 'scale_pos_weight': 1,\n",
       " 'seed': 3,\n",
       " 'silent': 1,\n",
       " 'subsample': 0.5}"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb1.get_xgb_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: from_csv is deprecated. Please use read_csv(...) instead. Note that some of the default arguments are different, so please refer to the documentation for from_csv when changing your function calls\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XmcXFWd9/HPr6q3JN2d7iSdfQWSQEAJEMOiYgSUgA64DRDEEZdh4JFxmXEceIaX8jDDDDqKoqKCiowbCMgoIq4gCohIgLCThZClsy/d6XR67/o9f5xbnUqnl0qnq2911/f9el266q7n1g31rXPuveeauyMiIgKQiLsAIiKSPxQKIiLSRaEgIiJdFAoiItJFoSAiIl0UCiIi0kWhIJLBzP6vmX0n7nKIxEWhMMyYWbmZrTOzizPGVZjZBjN7X8a4RWZ2v5nVmVm9mb1kZtebWXU0/VIz6zSzxmhYa2ZX5LjsS8ysNpfbOBQ9lcfd/9PdP5qj7a0zs7Nyse5cGKrjNdw+l5FOoTDMuHsjcBlwk5nVRKO/ACx393sAzOw04GHgMeBod68ClgIdwPEZq3vc3cvdvRx4H/AFMzthaPZEDoWZFcVdBikQ7q5hGA7A7cAdwBJgFzAlY9qjwNf6Wf5S4NFu4/4KXJzx/jzgRaCeEDLHZEw7JhpXH81zXsa0c4GXgL3AJuDTwBigGUgBjdEwtZf9uhn4ZbT8E8CRWXweRwO/A3YDK4ELBlIe4Frgh9FyswEHPgRsBOqAy4E3AM9F+/71jO0cCTwUHY+dwI+AqmjaD6JtNUfb+kwWn/E64F+jbbUCRdH7TdG+rATO7OGzOAXYCiQzxr0beC56vRhYDjQA24Abe/lMlwC1vUwbC3wf2AGsB64BEtG0JPCl6DN4Dbgy+hyLelnXOuCsXqb9PbAmOq73pf/NAAZ8GdgO7Ik+o+N6O95x//86nIbYC6BhgAcOqoEt0f94H8oYPwboBJb0s/ylZIRC9EVXD8yL3s8D9gFvA4qBz0T/c5ZE79cA/zd6f0b0P+D8aNktwJszynli9LrXL5mMctwefQEsjr4EfwTc2c8yYwhf2h+Kljkx+lyOPdTy0HMofAsoA94OtAA/AyYC06IvpbdE8x8VfV6lQA3wJ+ArGes+4Muvr884Y/4VwAxgFDA/2s+pGeXrMTCBV4G3Zby/G7gqev048IHodTlwSi/r6PV4EQLh50BFVI5VwEeiaZcTvpSnR5/37xlAKET/rnZGx7MU+Brwp2ja2cBTQBUhII4h+mHU2/HWkN2g5qNhyt3rCL8wRwP3ZkyqJjQLbk2PMLMvROcV9pnZNRnznhKNbyTUEn4ArI6mXQj80t1/5+7twBcJX0ynEX6JlgM3uHubuz8E3A8si5ZtBxaYWaW717n704e4e/e6+1/dvYMQCgv7mf+dwDp3/567d0Tb+ymhSWwwyvPv7t7i7r8lfInf4e7b3X0T8AhwAoC7r4k+r1Z33wHcCLylj/X29RmnfdXdN7p7MyHsS6N9KXb3de7+ai/rvoPoeJhZBeHX8x0Zn8dRZjbB3Rvd/S+H8mGYWTIq+9Xuvtfd1xFqBh+IZrkAuMnda6N/pzccyvozvB+4zd2fdvdW4GrgVDObHe1DBaGGaO7+srtvydi/wzneBU2hMEyZ2SWEX2i/Bz6fMamO0EwxJT3C3T/j4bzC/xJ+Saf9xd2rPJxTmAwcC/xnNG0qoVkgvY4U4VfqtGjaxmhc2vpoGsB7CV9C683sj2Z26iHu3taM102EAOrLLODkKODqzaye8IUyeZDKsy3jdXMP78sBzGyimd1pZpvMrAH4ITChj/X29RmnbcyYvgb4JKE2sz3a1tRe1v1j4D1mVgq8B3ja3dPb+gihlvKKmT1pZu/so4w9mUCoIa7PGJd5/Kdmlrvb60PR/fNpJDTNTYt+iHyd0NS4zcxuNbPKaNbDPd4FTaEwDJnZREJ76t8D/wBcYGanA7j7PkI7/HsOZZ3uvo3w6/pvolGbCV+26W0aoRljUzRthpll/vuZGU3D3Z909/MJTSw/A+5Kb+ZQynQINgJ/jAIuPZS7+xVDXJ7/itb5enevBC4hNG2kdd9eX59xj8u4+4/d/U3Rcs6BPwgy53uJ8IV6DnAxISTS01a7+zLC5/F54B4zG5P9brKT8Gt8Vsa4ruNPaL6ZnjFtxiGsO1P3z2cMMJ79/86+6u4nEX7MzAP+JRrf2/GWLCgUhqevAz9z9z9EVebPAN+OfhUSvf+wmV0VBQhmNh2Y09sKzWw84WTki9Gou4B3mNmZZlYM/DPhZOefCaGzD/iMmRWb2RJCmNxpZiVm9n4zGxs1iTQQmj0g/MIeb2ZjB+lzSLsfmGdmH4jKU2xmbzCzY4a4PBWEk8j1ZjaN6EsqwzbgiIz3fX3GBzGz+WZ2RnScWwi1lM6e5o38GPg4cDrhnEJ6PZeYWU1UM6mPRve6HjMryxwINdG7gOujy6FnAf9EqBml9+sTZjbNzKoIJ8f7U9xtO0VR+T9kZgujff5P4Al3Xxcd35Ojz21f9Hl09nO8JRtxn9TQcGgD8C7CL6iqbuMfBK7PeH8y8ADhf/p64AXgemB8NP1Swv8s6StvthPanCdmrOPdhBOGe4A/Ep24jaYdG43bE83z7mh8CfBrQjNWA/Ak8KaM5W4jNAHU0/vVR/+R8X4J/ZycjuabT7hiaUe0/ocI5yIOqTz0fKK5KGP+WjJO4hO+CK/J+Eyeij7PFYQv+dqMec8HNkTb+nQWn/E6Djwx/XrCuZ+9hJPx9/f0GWbMP5PwBf7LbuN/GB3vRsKPgHf1svySaP+7D0cRzl39MPq8NwKfZf/VR0WEmuwuwtVHnyLULKyX7azrYRv/EU27nHDSPL2/06PxZxKuOGpk/5Ve5f0dbw39DxZ9wCIiOWFm5wDfcvdZ/c4ssVPzkYgMKjMbZWbnmllR1Iz2OcJFDjIMqKYgw4KZvRn4VU/TPFw9JXnCzEYTmsKOJpz3+CXwCXdviLVgkhWFgoiIdFHzkYiIdBl2nWxNmDDBZ8+eHXcxRESGlaeeemqnu9f0N9+wC4XZs2ezfPnyuIshIjKsmNn6/udS85GIiGRQKIiISBeFgoiIdFEoiIhIF4WCiIh0USiIiEgXhYKIiHQpmFDY09TOS5vV9YqISF8KJhS+++WrmXLLMbS0NMddFBGRvFUwofDmuTVUWyMNdTvjLoqISN4qmFAoLh8HQGP9jphLIiKSvwomFEoqxgPQtEc1BRGR3hRMKJRVTACgtUGhICLSm4IJhTFVIRTa99XFXBIRkfxVMKFQUR26Ee/ctzvmkoiI5K+CCYVR5dWk3PBm1RRERHpTMKFgySIabTSJFoWCiEhvCiYUABqtgmSr7moWEelNzkLBzG4zs+1m9kIv083Mvmpma8zsOTM7MVdlSWtKVlDSvifXmxERGbZyWVO4HVjax/RzgLnRcBnwzRyWBYCW4rGM6lBNQUSkNzkLBXf/E9DXpT7nA9/34C9AlZlNyVV5ANqLKxmdUiiIiPQmznMK04CNGe9ro3EHMbPLzGy5mS3fsWPg3VR0llZRnmoc8PIiIiNdnKFgPYzznmZ091vdfZG7L6qpqRnwBr2sirE00trePuB1iIiMZHGGQi0wI+P9dGBzLjdoo8eRNKdhjy5LFRHpSZyhcB/wd9FVSKcAe9x9Sy43WDSmGoDGOvWUKiLSk6JcrdjM7gCWABPMrBb4HFAM4O7fAh4AzgXWAE3Ah3JVlrT6Z34GqPtsEZHe5CwU3H1ZP9Md+Fiutt+TWX9zNdz3Hlrqtw/lZkVEho2CuqO5YvxkAFr3qvtsEZGeFFQoVI4LoZBqVPORiEhPCioUSsZU00EC9qmmICLSk4IKBRIJGqhQT6kiIr0orFAAGpNVlLbqQTsiIj0puFBoKq6irL0+7mKIiOSlgguFtpJqyjsVCiIiPSm4UOgYNZ6xvodwm4SIiGQquFBg9Hiq2EdDU2vcJRERyTsFFwrJ8gkkzKnbtS3uooiI5J2CC4XiiokANO5WKIiIdFdwoTCqKjyPoaleoSAi0l3BhUJ51NXF/Y8/H3NJRETyT8GFwtjxUwFYMr2nB7+JiBS2gguFksqJpDBoVPORiEh3BRcKJIvYY2MpatIzFUREuiu8UAD2Fo2jtHVX3MUQEck7BRkKTSXjKW9XKIiIdFeQodA+qoaqVJ26uhAR6aYgQyE1ZiITqGdPU1vcRRERySsFGQrJykmUWge7dulks4hIpoIMhZKxUwDYs2NTzCUREckvBRkKY6Ib2Jp2bY65JCIi+aUgQ2FszXQAWuu3xFwSEZH8ktNQMLOlZrbSzNaY2VU9TJ9lZg+a2XNm9rCZTc9ledLGjAs1hedXrhqKzYmIDBs5CwUzSwI3A+cAC4BlZrag22xfBL7v7q8HrgP+K1flOaBso6po8yKO9dVDsTkRkWEjlzWFxcAad1/r7m3AncD53eZZADwYvf5DD9Nzw4y6ohpKi4uHZHMiIsNFLkNhGrAx431tNC7Ts8B7o9fvBirMbHz3FZnZZWa23MyW79ixY1AK11haQ0WbLkkVEcmUy1DoqW/q7rcQfxp4i5k9A7wF2AR0HLSQ+63uvsjdF9XU1AxK4dpGTWZcaiedKd3VLCKSlstQqAVmZLyfDhxwDai7b3b397j7CcC/ReP25LBMXVKVU5lMHTsaWoZicyIiw0IuQ+FJYK6ZzTGzEuAi4L7MGcxsgpmly3A1cFsOy3OA4qpplFo727frXgURkbSchYK7dwBXAr8BXgbucvcXzew6Mzsvmm0JsNLMVgGTgOtzVZ7uRo2fCUDDtg1DtUkRkbxXlMuVu/sDwAPdxn024/U9wD25LENvxk4KodC0S6EgIpJWkHc0A1RMDKHQUaf+j0RE0go2FKx8Mp1u1G54Ne6iiIjkjYINBZJF7LQqJqT0BDYRkbTCDQWg1UYxzbfGXQwRkbxR2KEw4VhqqKOlvTPuooiI5IWCDgWqZjLNdlG7uzHukoiI5IWCDoXS8bMotXa2bdnY/8wiIgWgoEOhYvKRADRsWRtzSURE8kNBh8LYyUcA0LpzXbwFERHJEwUdConqcAMb9bqrWUQECjwUKC1nNxW0qKYgIgIUeigA22wiU1wP2xERAYUCZUmYxg7qm9riLoqISOwKPhTsyDOYYdt5bXtD3EUREYldwYfC6MlHUWodbNuky1JFRAo+FKqmzwdg7+Y1MZdERCR+BR8KJTXhBraOnepCW0Sk4EOBymm0U0xJw7q4SyIiEjuFQiJJLRMZvW8D7h53aUREYqVQALYkpjCTbexs1GWpIlLYFArArLnHMcu2sXqbLksVkcKmUAAqps2n3FrYtPG1uIsiIhIrhQJQMX0BAHtrX4y5JCIi8VIoAFZzdHixY1W8BRERiVlOQ8HMlprZSjNbY2ZX9TB9ppn9wcyeMbPnzOzcXJanV+WTaEmMoXyv7lUQkcKWs1AwsyRwM3AOsABYZmYLus12DXCXu58AXAR8I1fl6ZMZrzKNaR217GpsjaUIIiL5IJc1hcXAGndf6+5twJ3A+d3mcaAyej0W2JzD8vRpU9EMjkpsYuXWvXEVQUQkdv2GgpkdaWal0eslZvZxM6vKYt3TgI0Z72ujcZmuBS4xs1rgAeAfeynDZWa23MyW79ixI4tNH7o3nnwqk6ye1Rs35WT9IiLDQTY1hZ8CnWZ2FPBdYA7w4yyWsx7Gdb9leBlwu7tPB84FfmBmB5XJ3W9190XuvqimpiaLTR+6Ma/8FIA9657PyfpFRIaDbEIh5e4dwLuBr7j7p4ApWSxXC8zIeD+dg5uHPgLcBeDujwNlwIQs1j343n9X+Ltdl6WKSOHKJhTazWwZ8EHg/mhccRbLPQnMNbM5ZlZCOJF8X7d5NgBnApjZMYRQyE37UH/GzqA1OYZxjatp7eiMpQgiInHLJhQ+BJwKXO/ur5nZHOCH/S0U1S6uBH4DvEy4yuhFM7vOzM6LZvtn4O/N7FngDuBSj6tXOjP2VR3NPNvAe7/x51iKICISt6L+ZnD3l4CPA5hZNVDh7jdks3J3f4BwAjlz3Ge7rfuNh1LgXCqaehxH77yLfS3tcRdFRCQW2Vx99LCZVZrZOOBZ4HtmdmPuizb0KmYupNKaOWuaQkFEClM2zUdj3b0BeA/wPXc/CTgrt8WKh00+DoDmlQ/GXBIRkXhkEwpFZjYFuID9J5pHpknHkiLJxNQ26pv0bAURKTzZhMJ1hJPFr7r7k2Z2BLA6t8WKSckYmqvnsdBe5ZkN9XGXRkRkyPUbCu5+t7u/3t2viN6vdff35r5o8SiZtZjjE69yzb3Pxl0UEZEhl82J5ulm9r9mtt3MtpnZT81s+lAULg7FMxcx1poY11Ybd1FERIZcNs1H3yPcdDaV0HfRL6JxI9O0RQAsSK2hozMVc2FERIZWNqFQ4+7fc/eOaLgdyE0HRPmgZj4dFHFMahUrt6nHVBEpLNmEwk4zu8TMktFwCbAr1wWLTSJJx7TFLEy8ysd+9HTcpRERGVLZhMKHCZejbgW2AO8jdH0xYpXOegMLbD0tzc1xF0VEZEhlc/XRBnc/z91r3H2iu7+LcCPbiGXTF1FiHUxrW0NcXTGJiMRhoE9e+6dBLUW+mXYSAMf5GlZta4y5MCIiQ2egodDTA3RGjrHT6EyUckJiNY+sjqcnbxGROAw0FEZ8m0rymHdwSuIVvvbgyLx5W0SkJ72GgpntNbOGHoa9hHsWRrbZb2Ky7aa6tZZ9rR1xl0ZEZEj0GgruXuHulT0MFe7e73MYhr3ZbwbglMRLPLxSTUgiUhgG2nw08k2Yi5dP4tTEy3z25y/EXRoRkSGhUOiNGZZK8dbEM9Q3tei5zSJSEBQKfVn6n1RaE69jLX9eM3Jv4hYRSVMo9OWos3BLsLRkBb95cWvcpRERyblsus7u6SqkjVF32kcMRSFjM3ocNuMUltjT3PNULZ2pEX8lrogUuGyuIroR2Az8mHDT2kXAZGAlcBuwJFeFywsNmzmadUxI7eSxNTs5fd7I7SBWRCSb5qOl7n6Lu+919wZ3vxU4191/AlTnuHzxu/gnAJxVtIK7n9KDd0RkZMsmFFJmdoGZJaLhgoxpfbanmNlSM1tpZmvM7Koepn/ZzFZEwyozy78HI9fMh+rZfGDcK9z/3Gbe+43H4i6RiEjOZBMK7wc+AGyPhg8Al5jZKODK3hYysyRwM3AOsABYZmYLMudx90+5+0J3Xwh8Dbh3QHuRS2Ywbylz9y2n1FvZ2dgWd4lERHKm33MK7r4W+JteJj/ax6KLgTXR8pjZncD5wEu9zL8M+Fx/5YnFukdJdLbyztJnuH/vaaRSTiIxsvsEFJHClM3VR9OjK422m9k2M/upmU3PYt3TgI0Z72ujcT1tYxYwB3gom0IPuX94BCqn8+lJT9PSnuKcmx6Ju0QiIjmRTfPR94D7CJ3gTQN+EY3rT08/pXs7B3ERcI+793jbsJldZmbLzWz5jh0x9EOUSMDxFzJpx5+Zmqxn855mPXxHREakbEKhxt2/5+4d0XA7kM11mbXAjIz30wmXtvbkIuCO3lbk7re6+yJ3X1RTE9MloccvwzzFfxTfxt6WDtUWRGREyiYUdprZJWaWjIZLgGz6fHgSmGtmc8yshPDFf1/3mcxsPuHS1scPpeBDbsJcmLaIJTX7KE5CbZ1qCyIy8mQTCh8GLgC2AluA9wEf6m8hd+8gXJ30G+Bl4C53f9HMrjOz8zJmXQbc6cPhG3bhMhI7XuYtFVtpbFVtQURGHhvId7GZfdLdv5KD8vRr0aJFvnz58jg2DU274QtH0DlmEsfUf4XipPH8tWfrSiQRyXtm9pS7L+pvvoF2iPdPA1xueBs9Dk78O5ItdbxubCv72jp525f/GHepREQGzUBDoXB/Gp/2j9DZxj0nPc/okiQbdjfR0NIed6lERAbFQEMh/9v/c2XCXBhVjT32FY4el6S90znzi6otiMjI0Gso9NJldoOZ7SXcs1C4lt0JqQ7uPXUtU8aWsaOxlbO//EcuvCW/L6ASEelPr6Hg7hXuXtnDUOHu2XS5PXLNPBlKK+G31zBnbJKy4gSv7WyiozMVd8lERA6Lnrw2UBf+EDrb+PHC57nzslNp70yxonYPF3zrz3GXTERkwBQKA3XEW6CsCn5/LQtrjFnjR9OZclZsrFczkogMWwqFw/HB+yDVAY/dxMP/8lYmVpTS1uns2Nsad8lERAZEoXA4phwPo2vg0S9D/QYe/dczqCwrYu3OfZz5pYfjLp2IyCFTKByuyx6ColFw/6coSRrzJlWQTBiv7tjHGV98OO7SiYgcEoXC4aqaCWd9Dtb8Hr6+mHuuOI0TZlSRTBhrdyoYRGR4USgMhjd8FGacDE07oHE791xxGidmBMNpNzxIKlW49/uJyPChUBgMiSSc9zVoroevL4ZUiruvOI0TZ1ZRnDQ217dw4r//juM+92tdmSQieU2hMFhq5sM7vgQtdXDT8QDcfXloSpo9fjR7mtvZ19rJc7W6ZFVE8pdCYTAt+nC4GmnPBlj7MAB3XX4aD//LWzl2aiVm0Nye4qkNdRyrWoOI5CGFwmAyg0+sgOJR8MP3wu61XZPu//ibOWlmNSVJo6PT2dfayYqN9fyt7oAWkTyiUBhspeVw+WPh9TdOg+a6rkl3X3EaJ8ysZkxJkmTCaO1I8VztHs744sM63yAieWFAT16LU6xPXjsU6/8M3z8/XJX0/nuguOyAyRfe8jjP1dZjZjS1dWJASVGC4qRx7NSx/OQfTo2n3CIyIuX6yWvSn1mnQdVsWPcI/OQSaG85YPJP/uFUXj+9iuOmVjJ/UjkJg9aOFI2tnTyzoY5jP6uag4gMPdUUcu2rJ8HuNTD37XDBDw6qMaRdeMvjPL9pD20dKTqiexrKS4sYP6aEnftaSZixYEqlahAiMiDZ1hQUCkPhqdvhF58Iz2D4xLPhWc+9uPCWx3lh8x7aO52kGc3tnQAkDIqTCYoSRiKhgBCRQ6NQyDfP3wM//SgUlcIVf4bxR/Y5e7rpqLmtk1e27aWjM0X6pmgzmFhRStWoYjbVN2OqRYhIPxQK+WjDE3D7ueH1390Hs9+Y1WIX3vI4L21pIJVyOqLB3btCIpmwMFh4fezUsV3LKihEBBQK+Wv32nCpakcLvP3f4dQrw0//LKUD4uhJFext7eDVHY10pvYHBEBZcYLy0iL2tnSQTBgJA4u2oRqFSGHKi1Aws6XATUAS+I6739DDPBcA1wIOPOvuF/e1zmEfChD6SPraidC0C+afC+/6BoyqHtCq0iHh7nSmnE6H0SVJGls6uk5YQzgnkUwY06tGMaokyYbdTV1BkabAEBm5Yg8FM0sCq4C3AbXAk8Ayd38pY565wF3AGe5eZ2YT3X17X+sdEaEA4A5fPQHq1kHVDDj/Zphz+mGtMh0QC6ZU4u68uLmBzigsUlFgZDIgkVGTSBgcVVNOaVGCVdsbD5hXgSEyvOVDKJwKXOvuZ0fvrwZw9//KmOcLwCp3/0626x0xoZBWuzycZ+hohdET4PJHoHLqoK0+HRQA7s5RNeU0tXeyblcTqZSTcsc9VNO6SxgkbH9ozJkwhtKiBK/uaOyxlpGm8BDJP/kQCu8Dlrr7R6P3HwBOdvcrM+b5GaE28UZCE9O17v7rvtY74kIBoL0ZHv0K/PHzYAk461o45QpIFudkc5lBkZY+cT2tahStHZ1s2dNCyukKje6McCrEzMLVUOWlFBcl2NbQQiIaF+axrsDovk3VPkSGTj6Ewt8CZ3cLhcXu/o8Z89wPtAMXANOBR4Dj3L2+27ouAy4DmDlz5knr16/PSZljt3stfPuM0F9SzdFw7n8fdpPSoci8gzrzC9zdcWBG9WhaO1Jsqm8O4zJCo69/RWVFCYqSCZrbOw8ICwNmjhtNcdJYt6upaxoceGK8e3nSVDsRyV4+hEI2zUffAv7i7rdH7x8ErnL3J3tb74isKXS38ldw9wdDk9Kx7wk1h+pZcZcK6LmWAXDM5Ao6Us4rW/d21TqcECgVZcV0dKZobO3oN0DSLPrP6OLQeWBTWyekAwXAjKljy0gmLNyrwf6gweCYyZUkDF7euvegdWcTNL1NU/jIcJUPoVBEaBo6E9hEONF8sbu/mDHPUsLJ5w+a2QTgGWChu+/qbb0FEQoQmpQeuwkevgFwKJ8MH/19OCmd57oHR/cv2vS/OQfmjB9De6ezfve+/YER1UzcYUxpEZ0pp6kt+0Dpbn/tI/wdU1pE0ozG1o6uoEnPN6myjIQZWxtauuZPh9ARE8aQMGPtjsYDlzuEWk0up6VDK13jU4BJpthDISrEucBXCOcLbnP3683sOmC5u99n4f+mLwFLgU7gene/s691FkwopDVshkduhCejc/Fv+Ai8+Z8H9WR0vuipFtLTl2L63+y8SRV0ppzV2xsPCpOJFaWk3Nm+tzVaJlqWUPvodKelvfOA8YcrfU9IR6cfEBoQgihh0NjauX98NM+4MSUYxu59rV3JlZ5n6tgyzIzNe5rJWAyAWePHkDBYt6vpgHKkp8+dWA4GqzOuJEtPO3pyJQa8vHX/55rLcMvVeguhrJnTYeBhnxehkAsFFwpp9RvhO2dB41bAYPFlcOrH8qZZKU7Zhkl/09ydYyZXknLnlW17IaNm4u7MGDeaVMqprWvePz5MxAlf7imHun1t+9cZ/R1dksTdQzNY13JhWiJhB9yhHrd0E1z3rwYDipIhODrS1zdn1KZKihIYRmtHZ4/rHF1aBND1GWSqLAvT9rZ0HDStenQxYNQ3tx00bfyYUsxgV2PrAeMdmFBeCsDObtMMGF9eivUwLR3KE8tLwej6UZGxq0yqDOvd1tBtWWByZRkYbN3TctC0qWNDZ5ibM6Z1hX/VqGhac/fFgHABCIROMn9+5Zt6nKc/CoWRqm49fPdt0Lgd8HAZ68V3wfST4i7ZiJQZOEPx6zLz/0cH5k2sIOXOmnRtKMOscaNxYP2ufV3zZ5o6dhSOs7n+4C+oiRXRF1vGl146Bcb38mUKUD26BHeoa247aIOVZUU44Yu9p2+V0SVJcEJTYLdpZcVJ3KG1o/PAaR46gnS8K4gypyeMrtphd4m0ZcQoAAAPyUlEQVToG7ensE1/GQ+vbz+YPX40D//LWwe0bLahUDSgtUt8qmfBp1fBnk3wxLfg8ZvhO2eEHljf9U2Yfw4kknGXcsTIt3b5zJCqHBUuWS5K9vxYlJqK9Jf7wb+w079M65vbD5o2c9xogHDOpZs5E8YA8NKWg3/tz51UEU3L/yaZvpolAY6Jpr/cw7LzJ1cCzsoeLmKYF30GK7f1MG1iBQ6s3t5tmsNRE8sBWNPtptG0I2vC9OJk9l3iDJRqCsNd6154+gfw+2uhsxWKysI5h+MvgqqZcZdORPKEmo8KTWcHvHwf3P9JaNkTxpWNhaWfhwXnQcmYeMsnIrFSKBSyuvXw3E/gkS+F3lgtCccvg4UXw8xTIaGnsIoUGoWChLNvGx6HFT+CFXeAd0KyFE65HBacD1NPPKRuu0Vk+FIoyIHa9sHL94caxKsPEa67sHC/wwXfh2knKSBERjCFgvSuaTesfAB+e03oZwlCDWLRh0MNYsbJamISGWEUCpKd5rrQ19Jv/g2ad4dxyWIYNT4842HWaVAyOt4yishhUyjIoWtpgNW/hZd+Dq/cD54CDI5YAkedCUedFXpvVTOTyLCjUJDD094M6x+DNQ/BU7eF9wDJEiirhnNuCGExelycpRSRLCkUZHDtqYU1D8KrD4ZaRCq6o7WkAk79P3DkGTBtESR1k7xIPlIoSO50dsDmp2HN7+Hxb0BbdNu+JcMNc0uuhtlvCk1NOmEtkhcUCjJ0muvgtT+FS11X3BG62wBIFMH8c2H2mxUSIjFTKEh86taH8xHrHoXn7zkwJI5+RwiJWadBzTEKCZEholCQ/FG3HtY9EkLihXv3h4Qlw7mImSfDjFPCDXS6/FUkJxQKkp/coW5d6H5jw19g4xOw45X900vK4YRLYMbicBPd2OmxFVVkJFEoyPDRtBs2/hVq/wp//Ta0ZvRhnywJ5yVmLIbpi2HK66GoNL6yigxTCgUZvjrbYdsLISg2PhH6bOrMeApYSQWc+AGYshCmHA8T5urBQiL9UCjIyLJ3K9Q+GYLiqdsPrE1ACIrXvQ8mvw4mvx4mLdAzJEQyKBRkZOvsgF2rYcuzsHlF6B68e1AUjQrBcOrHQlBMfh1UTIqnvCIxUyhI4XGH+g2w9fkwbHsBVv/uwKanRDHMOT2qUUS1ivFHqvlJRrxsQ0F9EsjIYQbVs8JwzDv3j2+ug20vhqB45MZw1VPXMyUian4SAVRTkELV0QY7V+6vVTzzI2jdc+A8RaNg/jn7g0LNTzKM5UXzkZktBW4CksB33P2GbtMvBf4b2BSN+rq7f6evdSoUJGfcYc/G/UHxxC3RQ4gy/h9JFIcaxEmXqvlJhpXYQ8HMksAq4G1ALfAksMzdX8qY51Jgkbtfme16FQoy5Jrrw/mJdFi8+DNo33fgPMWjYd5SqJkPE+aFv+OP0j0Vkjfy4ZzCYmCNu6+NCnQncD7wUp9LieSbUVWhQ7/Zbwrv3/WNjOanF0JgrPgRrPwlvHjvgcsWlcGRZ0LNvNAh4IR5YSgtH/r9EMlCLkNhGrAx430tcHIP873XzE4n1Co+5e4bu89gZpcBlwHMnDkzB0UVOURFJfuvYAI4+/rwt60pXCq7Y1UIjeW3wapfhWdiZzZDJUv39xybGRh6aJHELJeh0NMzG7u3Vf0CuMPdW83scuB/gDMOWsj9VuBWCM1Hg11QkUFTMjrcZT3l+PD+jGvC38522L0WdqwMw86V8MoD4aFFmRLFMPOUqBlq/v7AKJ+kx6DKkMhlKNQCMzLeTwc2Z87g7rsy3n4b+HwOyyMSn2Rx+KKvmX/g+FQK9mw4MCxe/HnoVba70gpYcH4UFlENY+wMneSWQZXLUHgSmGtmcwhXF10EXJw5g5lNcfct0dvzgJdzWB6R/JNIQPXsMMw7O4w7/+ZwJdTerSEkdqwKPck+f0+4dPaACreFGsSoajjxg6EfqPFHhUFNUTIAOQsFd+8wsyuB3xAuSb3N3V80s+uA5e5+H/BxMzsP6AB2A5fmqjwiw4oZVE4JwxFLwrh33hj+Nu2GnatCzWLX6hAUzXXw6I0HriNRFJ5RMX5uuGx2wtzwetwcXRUlvdLNayIjRWcH1K+Hnath15oQGC/cC617Oeh0XlFZuDmveBSc/s9RcBwFlVN17mKEiv0+hVxRKIgMQEtDFBSvhrDYtQZW/Rra9h08b/EYmPf2EBLjjgy1DDVHDXv5cJ+CiOSLskqYdmIYMrnD3i37axd/+iJ0NMPKX0FHy8HrKSkPDz1KB8W4I8LrsrFDsx+Sc6opiEjPOtpCc9SuV2H3q/DYV0NgtLcc2PMsABYFxlKonhPOW1TPCSfQKyarSSoPqKYgIoenqCScnJ4wN7w/9WP7p7W3QN1rUXPUGvjLN0LN4qX7eggMQjcgc07fHxTVs0NwVM2C4rKh2BvJkmoKIjK4OtrCcy3qXoO6dWFY8eMQGh0t4KluC1i4B+OY82Dc7ANrGqOqVcsYJDrRLCL5xx327QyBsfu18PfJ70aB0Rzu/O6uZAwcddb+Wsa46G/ldEiqsSNbaj4SkfxjBuU1YZixOIxbctX+6W379tcudqdrGq/Bqt+G0OiuqAxmnXZwYFTPUaeDA6RQEJH8UTIGJh0bhu5SndCw6eDAWP276El63SSKYerCboERNU2pL6leKRREZHhIJKFqZhjmnH7w9Oa6nmsZL/384JPflohu4CuD45dFj3GdHU58V80MHRsWKIWCiIwMo6rDMPWEg6d1tIWn6qXPY6T/rn0Y/nLzwfMni2HKCVFTVEZgVM8Od32P4E4IFQoiMvIVlUQ33B158DR32LcjqlmsD3/r18FLv4DaJzm4x38LfUcVlcGx7z44NIb5FVO6+khEpC+d7aGW0RUY0d/Vv4u6Cen2HWrJ6Il7b91/T0bVrBAeVTNDf1Mx0NVHIiKDIVkcuvMYd0TP01saoqBYvz8wnr87PG3voHsyINyXUQ5zz94fFOnQqJweajUxUk1BRCRX3KFxW0YtY8P+pqnWBg5umoKum/mOOiucv+gaph1Wx4S6eU1EJN91doTLbOs3HFjbWPlraNt7cE1j3JHw8acHtCk1H4mI5LtkUXSiehbw5oOnu4dLbRs2h6H741xzQKEgIpKvzEJz0ehxMPm4IdlkYki2IiIiw4JCQUREuigURESki0JBRES6KBRERKSLQkFERLooFEREpItCQUREugy7bi7MbAewfoCLTwB2DmJx8pX2c+QplH3VfubOLHev6W+mYRcKh8PMlmfT98dwp/0ceQplX7Wf8VPzkYiIdFEoiIhIl0ILhVvjLsAQ0X6OPIWyr9rPmBXUOQUREelbodUURESkDwoFERHpUjChYGZLzWylma0xs6viLs9gMrN1Zva8ma0ws+XRuHFm9jszWx39rY67nIfKzG4zs+1m9kLGuB73y4KvRsf3OTM7Mb6SH5pe9vNaM9sUHdMVZnZuxrSro/1caWZnx1PqQ2dmM8zsD2b2spm9aGafiMaPqGPax34Oj2Pq7iN+AJLAq8ARQAnwLLAg7nIN4v6tAyZ0G/cF4Kro9VXA5+Mu5wD263TgROCF/vYLOBf4FWDAKcATcZf/MPfzWuDTPcy7IPr3WwrMif5dJ+Pehyz3cwpwYvS6AlgV7c+IOqZ97OewOKaFUlNYDKxx97Xu3gbcCZwfc5ly7Xzgf6LX/wO8K8ayDIi7/wnY3W10b/t1PvB9D/4CVJnZlKEp6eHpZT97cz5wp7u3uvtrwBrCv++85+5b3P3p6PVe4GVgGiPsmPaxn73Jq2NaKKEwDdiY8b6Wvg/ScOPAb83sKTO7LBo3yd23QPhHCkyMrXSDq7f9GonH+Mqo2eS2jOa/EbGfZjYbOAF4ghF8TLvtJwyDY1oooWA9jBtJ1+K+0d1PBM4BPmZmp8ddoBiMtGP8TeBIYCGwBfhSNH7Y76eZlQM/BT7p7g19zdrDuGGzrz3s57A4poUSCrXAjIz304HNMZVl0Ln75ujvduB/CVXPbemqdvR3e3wlHFS97deIOsbuvs3dO909BXyb/c0Jw3o/zayY8EX5I3e/Nxo94o5pT/s5XI5poYTCk8BcM5tjZiXARcB9MZdpUJjZGDOrSL8G3g68QNi/D0azfRD4eTwlHHS97dd9wN9FV6ycAuxJN0kMR93azt9NOKYQ9vMiMys1sznAXOCvQ12+gTAzA74LvOzuN2ZMGlHHtLf9HDbHNO4z9UM1EK5kWEU4s/9vcZdnEPfrCMKVC88CL6b3DRgPPAisjv6Oi7usA9i3OwjV7HbCr6mP9LZfhCr4zdHxfR5YFHf5D3M/fxDtx3OEL40pGfP/W7SfK4Fz4i7/IeznmwjNIs8BK6Lh3JF2TPvYz2FxTNXNhYiIdCmU5iMREcmCQkFERLooFEREpItCQUREuigURESki0JBRES6KBREsmBmC7t1dXzeYHXBbmafNLPRg7EukcOl+xREsmBmlxJunroyB+teF6175yEsk3T3zsEui4hqCjKimNns6OEm344ecPJbMxvVy7xHmtmvo95lHzGzo6Pxf2tmL5jZs2b2p6hrlOuAC6OHo1xoZpea2dej+W83s29GD1ZZa2ZviXrBfNnMbs/Y3jfNbHlUrv8Xjfs4MBX4g5n9IRq3zMJDk14ws89nLN9oZteZ2RPAqWZ2g5m9FPW6+cXcfKJScOK+JVyDhsEcgNlAB7Awen8XcEkv8z4IzI1enww8FL1+HpgWva6K/l4KfD1j2a73wO2EZ3QYoW/8BuB1hB9dT2WUJd19QxJ4GHh99H4d0UOSCAGxAagBioCHgHdF0xy4IL0uQpcIlllODRoOd1BNQUai19x9RfT6KUJQHCDq1vg04G4zWwHcQnhiFsBjwO1m9veEL/Bs/MLdnRAo29z9eQ+9Yb6Ysf0LzOxp4BngWMITt7p7A/Cwu+9w9w7gR4QnswF0EnrehBA8LcB3zOw9QFOW5RTpU1HcBRDJgdaM151AT81HCaDe3Rd2n+Dul5vZycA7gBVmdtA8fWwz1W37KaAo6v3y08Ab3L0ualYq62E9PfWtn9bi0XkEd+8ws8XAmYRef68EzsiinCJ9Uk1BCpKHh568ZmZ/C10PiT8+en2kuz/h7p8FdhL6ut9LeN7uQFUC+4A9ZjaJ8ECktMx1PwG8xcwmmFkSWAb8sfvKoprOWHd/APgk4cEtIodNNQUpZO8Hvmlm1wDFhPMCzwL/bWZzCb/aH4zGbQCuipqa/utQN+Tuz5rZM4TmpLWEJqq0W4FfmdkWd3+rmV0N/CHa/gPu3tOzMCqAn5tZWTTfpw61TCI90SWpIiLSRc1HIiLSRc1HMuKZ2c3AG7uNvsndvxdHeUTymZqPRESki5qPRESki0JBRES6KBRERKSLQkFERLr8f8gcOOsHBvsjAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "cvresult = pd.DataFrame.from_csv('1_nestimators.csv')\n",
    "        \n",
    "# plot\n",
    "test_means = cvresult['test-mlogloss-mean']\n",
    "test_stds = cvresult['test-mlogloss-std'] \n",
    "        \n",
    "train_means = cvresult['train-mlogloss-mean']\n",
    "train_stds = cvresult['train-mlogloss-std'] \n",
    "\n",
    "x_axis = range(0, cvresult.shape[0])\n",
    "        \n",
    "pyplot.errorbar(x_axis, test_means, yerr=test_stds ,label='Test')\n",
    "pyplot.errorbar(x_axis, train_means, yerr=train_stds ,label='Train')\n",
    "pyplot.title(\"XGBoost n_estimators vs Log Loss\")\n",
    "pyplot.xlabel( 'n_estimators' )\n",
    "pyplot.ylabel( 'Log Loss' )\n",
    "pyplot.savefig( 'n_estimators4_1.png' )\n",
    "\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': range(4, 10, 2)}"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#第一轮参数调整得到的n_estimators最优值（263），其余参数继续默认值,获取最优max_depth\n",
    "#max_depth 建议3-10， min_child_weight=1／sqrt(ratio_rare_event) =5.5\n",
    "max_depth = range(4,10,2)\n",
    "#min_child_weight = range(1,6,2)\n",
    "#param_test2_1 = dict(max_depth=max_depth, min_child_weight=min_child_weight)\n",
    "param_test2_1 = dict(max_depth=max_depth)\n",
    "param_test2_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.59176, std: 0.00261, params: {'max_depth': 4},\n",
       "  mean: -0.58809, std: 0.00293, params: {'max_depth': 6},\n",
       "  mean: -0.60215, std: 0.00263, params: {'max_depth': 8}],\n",
       " {'max_depth': 6},\n",
       " -0.5880918664286527)"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "xgb2_1 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=263,  #第一轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch2_1 = GridSearchCV(xgb2_1, param_grid = param_test2_1, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch2_1.fit(X_train , y_train)\n",
    "\n",
    "gsearch2_1.grid_scores_, gsearch2_1.best_params_,     gsearch2_1.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'min_child_weight': range(1, 10)}"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#目前已获得两个参数n_estimators=263和max_depth=6，其余参数继续默认值\n",
    "min_child_weight = range(1,10,1)\n",
    "#param_test2_1 = dict(max_depth=max_depth, min_child_weight=min_child_weight)\n",
    "param_test3 = dict(min_child_weight=min_child_weight)\n",
    "param_test3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.58809, std: 0.00293, params: {'min_child_weight': 1},\n",
       "  mean: -0.58837, std: 0.00217, params: {'min_child_weight': 2},\n",
       "  mean: -0.58784, std: 0.00162, params: {'min_child_weight': 3},\n",
       "  mean: -0.58878, std: 0.00135, params: {'min_child_weight': 4},\n",
       "  mean: -0.58858, std: 0.00175, params: {'min_child_weight': 5},\n",
       "  mean: -0.58868, std: 0.00161, params: {'min_child_weight': 6},\n",
       "  mean: -0.58842, std: 0.00192, params: {'min_child_weight': 7},\n",
       "  mean: -0.58831, std: 0.00225, params: {'min_child_weight': 8},\n",
       "  mean: -0.58870, std: 0.00207, params: {'min_child_weight': 9}],\n",
       " {'min_child_weight': 3},\n",
       " -0.5878422087902345)"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb3 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=263,  #第一轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch3= GridSearchCV(xgb3, param_grid = param_test3, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch3.fit(X_train , y_train)\n",
    "\n",
    "gsearch3.grid_scores_, gsearch3.best_params_,     gsearch3.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reg_alpha': [0.1, 1, 2]}"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#最佳min_child_weight=3\n",
    "reg_alpha = [ 0.1, 1, 2]    #default = 0, 测试0.1,1，1.5，2\n",
    "param_test4 = dict(reg_alpha=reg_alpha)\n",
    "param_test4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.58893, std: 0.00185, params: {'reg_alpha': 0.1},\n",
       "  mean: -0.58806, std: 0.00224, params: {'reg_alpha': 1},\n",
       "  mean: -0.58848, std: 0.00192, params: {'reg_alpha': 2}],\n",
       " {'reg_alpha': 1},\n",
       " -0.5880646733408024)"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb4 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=263,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=3,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch4 = GridSearchCV(xgb4, param_grid = param_test4, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch4.fit(X_train , y_train)\n",
    "\n",
    "gsearch4.grid_scores_, gsearch4.best_params_,     gsearch4.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reg_lambda': [0.1, 1, 2, 3, 4]}"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#到目前为止获得了以下最优参数\n",
    "#n_estimators=263\n",
    "#max_depth=6\n",
    "#min_child_weight=3\n",
    "#reg_alpha=1\n",
    "reg_lambda = [0.1, 1, 2, 3, 4]      #default = 1，测试0.1， 0.5， 1，2\n",
    "param_test5 = dict(reg_lambda=reg_lambda)\n",
    "param_test5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.58861, std: 0.00263, params: {'reg_lambda': 0.1},\n",
       "  mean: -0.58806, std: 0.00224, params: {'reg_lambda': 1},\n",
       "  mean: -0.58861, std: 0.00187, params: {'reg_lambda': 2},\n",
       "  mean: -0.58850, std: 0.00265, params: {'reg_lambda': 3},\n",
       "  mean: -0.58822, std: 0.00202, params: {'reg_lambda': 4}],\n",
       " {'reg_lambda': 1},\n",
       " -0.5880646733408024)"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb5 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=263,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=3,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        reg_alpha = 1,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch5 = GridSearchCV(xgb5, param_grid = param_test5, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch5.fit(X_train , y_train)\n",
    "\n",
    "gsearch5.grid_scores_, gsearch5.best_params_,     gsearch5.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#到目前为止获得了以下最优参数\n",
    "#n_estimators=263\n",
    "#max_depth=6\n",
    "#min_child_weight=3\n",
    "#reg_alpha=1\n",
    "#reg_lambda=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "#使用xgboost内嵌交叉验证\n",
    "def modelfit_1(alg, X_train, y_train, nfold=5, early_stopping_rounds=10):\n",
    "    xgb_param = alg.get_xgb_params()\n",
    "    xgb_param['num_class'] = 3\n",
    "    \n",
    "    #直接调用xgboost，而非sklarn的wrapper类\n",
    "    xgtrain = xgb.DMatrix(X_train, label = y_train)\n",
    "        \n",
    "    cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=5,\n",
    "             metrics='mlogloss', early_stopping_rounds=early_stopping_rounds)\n",
    "  \n",
    "    cvresult.to_csv('6_nestimators.csv', index_label = 'n_estimators')\n",
    "    \n",
    "    #最佳参数n_estimators\n",
    "    n_estimators = cvresult.shape[0]\n",
    "    \n",
    "    # 采用交叉验证得到的最佳参数n_estimators，训练模型\n",
    "    alg.set_params(n_estimators = n_estimators)\n",
    "    alg.fit(X_train, y_train, eval_metric='mlogloss')\n",
    "        \n",
    "    #Predict training set:\n",
    "    train_predprob = alg.predict_proba(X_train)\n",
    "    logloss = log_loss(y_train, train_predprob)\n",
    "\n",
    "   #Print model report:\n",
    "    print('logloss of train is:', logloss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of train is: 0.47483957907478325\n"
     ]
    }
   ],
   "source": [
    "xgb_last = XGBClassifier(\n",
    "        learning_rate =0.02,\n",
    "        n_estimators=2000,  #数值大没关系，cv会自动返回合适的n_estimators\n",
    "        max_depth=6,\n",
    "        min_child_weight=3,\n",
    "        gamma=0,\n",
    "        subsample = 0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel=0.7,\n",
    "        reg_alpha = 1,\n",
    "        reg_lambda = 1,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "modelfit_1(xgb_last, X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'base_score': 0.5,\n",
       " 'booster': 'gbtree',\n",
       " 'colsample_bylevel': 0.7,\n",
       " 'colsample_bytree': 0.8,\n",
       " 'gamma': 0,\n",
       " 'learning_rate': 0.02,\n",
       " 'max_delta_step': 0,\n",
       " 'max_depth': 6,\n",
       " 'min_child_weight': 3,\n",
       " 'missing': None,\n",
       " 'n_estimators': 1189,\n",
       " 'nthread': 1,\n",
       " 'objective': 'multi:softprob',\n",
       " 'reg_alpha': 1,\n",
       " 'reg_lambda': 1,\n",
       " 'scale_pos_weight': 1,\n",
       " 'seed': 3,\n",
       " 'silent': 1,\n",
       " 'subsample': 0.5}"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_last.get_xgb_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: from_csv is deprecated. Please use read_csv(...) instead. Note that some of the default arguments are different, so please refer to the documentation for from_csv when changing your function calls\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xl8XFX9//HXJ5Oka9K0TVratKUrS1kELKsI/SLIouyCFEHBBcUvLvh1AeWHyFcEd1AQRARcWERALYuiXwRBxdpWttJSKG1p05buO92SfH5/nJN0mk6SSZvJncm8n4/HfeQuZ+793Lnt/cw55y7m7oiIiACUJB2AiIjkDyUFERFppqQgIiLNlBRERKSZkoKIiDRTUhARkWZKCiJpzOyrZnZH0nGIJEVJocCYWV8zm29m56fNqzCzBWb2gbR5E8zsUTNbbWZrzGymmV1nZv3j8ovMrMHMNsRhrpldmuPYJ5pZXS630RGZ4nH3b7n7x3O0vflmdnwu1p0LXXW8Cu176e6UFAqMu28ALgFuMrOaOPs7wDR3fxDAzI4Cngb+Aezj7lXASUA98I601T3n7n3dvS/wAeA7ZnZw1+yJdISZlSYdgxQJd9dQgANwN3AfMBFYCQxJW/Z34MftfP4i4O8t5v0bOD9t+jTgFWANIcnsm7Zs3zhvTSxzWtqyU4CZwHpgEfBFoA+wCWgENsRhaCv7dQvwWPz8FGBMFt/HPsBfgFXAbODcXYkHuAb4dfzcSMCBi4GFwGrgU8ChwEtx329O284Y4K/xeKwA7gGq4rJfxW1titv6chbf8XzgK3FbW4DSOL0o7sts4D0ZvosjgLeAVNq8M4GX4vhhwDRgHbAU+EEr3+lEoK6VZf2AXwLLgTeBq4CSuCwFfD9+B/OAy+L3WNrKuuYDx7ey7BPAnHhcJzf9mwEM+CGwDFgbv6P9WzveSf9/LaQh8QA07OKBg/7Akvgf7+K0+X2ABmBiO5+/iLSkEE90a4C94vRewEbgBKAM+HL8z1kep+cAX43Tx8X/gHvHzy4B3p0W5yFxvNWTTFocd8cTwGHxJHgPcH87n+lDOGlfHD9zSPxe9utoPGROCrcBPYH3ApuB3wODgNp4Ujo2lh8bv68eQA3wDHBj2rp3OPm19R2nlX8BGA70AvaO+zk0Lb6MCRN4Azghbfq3wBVx/DngwjjeFziilXW0erwICeEPQEWM4zXgY3HZpwgn5WHx+/4/diEpxH9XK+Lx7AH8GHgmLjsRmA5UERLEvsQfRq0dbw3ZDWo+KlDuvprwC7M38HDaov6EZsG3mmaY2Xdiv8JGM7sqrewRcf4GQi3hV8DrcdkHgcfc/S/uvg34HuHEdBThl2hf4AZ33+rufwUeBSbFz24DxptZpbuvdvf/dHD3Hnb3f7t7PSEpHNRO+fcD8939Lnevj9t7iNAk1hnx/K+7b3b3PxNO4ve5+zJ3XwQ8CxwM4O5z4ve1xd2XAz8Ajm1jvW19x01+5O4L3X0TIdn3iPtS5u7z3f2NVtZ9H/F4mFkF4dfzfWnfx1gzq3b3De7+r458GWaWirFf6e7r3X0+oWZwYSxyLnCTu9fFf6c3dGT9aT4E3Onu/3H3LcCVwJFmNjLuQwWhhmjuPsvdl6Tt3+4c76KmpFCgzOwCwi+0/wO+nbZoNaGZYkjTDHf/sod+hd8Rfkk3+Ze7V3noU9gD2A/4Vlw2lNAs0LSORsKv1Nq4bGGc1+TNuAzgbMJJ6E0z+5uZHdnB3XsrbfxtQgJqy57A4THBrTGzNYQTyh6dFM/StPFNGab7ApjZIDO738wWmdk64NdAdRvrbes7brIwbfkc4POE2syyuK2hraz7XuAsM+sBnAX8x92btvUxQi3lVTObambvbyPGTKoJNcQ30+alH/+h6XG3GO+Ilt/PBkLTXG38IXIzoalxqZndbmaVsejuHu+ipqRQgMxsEKE99RPAJ4FzzewYAHffSGiHP6sj63T3pYRf16fGWYsJJ9umbRqhGWNRXDbczNL//YyIy3D3qe5+OqGJ5ffAA02b6UhMHbAQ+FtMcE1DX3e/tIvjuT6u80B3rwQuIDRtNGm5vba+44yfcfd73f3o+Dlnxx8E6eVmEk6oJwPnE5JE07LX3X0S4fv4NvCgmfXJfjdZQfg1vmfavObjT2i+GZa2bHgH1p2u5ffTBxjI9n9nP3L3dxJ+zOwFfCnOb+14SxaUFArTzcDv3f2pWGX+MvCz+KuQOP1RM7siJhDMbBgwqrUVmtlAQmfkK3HWA8D7zOw9ZlYG/A+hs/OfhKSzEfiymZWZ2URCMrnfzMrN7ENm1i82iawjNHtA+IU90Mz6ddL30ORRYC8zuzDGU2Zmh5rZvl0cTwWhE3mNmdUST1JplgKj06bb+o53YmZ7m9lx8ThvJtRSGjKVje4FPgscQ+hTaFrPBWZWE2sma+LsVtdjZj3TB0JN9AHgung59J7AFwg1o6b9+pyZ1ZpZFaFzvD1lLbZTGuO/2MwOivv8LWCKu8+Px/fw+L1tjN9HQzvHW7KRdKeGho4NwBmEX1BVLeY/CVyXNn048DjhP/0aYAZwHTAwLr+I8J+l6cqbZYQ250Fp6ziT0GG4FvgbseM2Ltsvzlsby5wZ55cDfyI0Y60DpgJHp33uTkITwBpav/rom2nTE2mnczqW25twxdLyuP6/EvoiOhQPmTuaS9PK15HWiU84EV6V9p1Mj9/nC4STfF1a2dOBBXFbX8ziO57Pjh3TBxL6ftYTOuMfzfQdppUfQTiBP9Zi/q/j8d5A+BFwRiufnxj3v+UwltB39ev4fS8Ermb71UelhJrsSsLVR5cTahbWynbmZ9jGN+OyTxE6zZv2d1ic/x7CFUcb2H6lV9/2jreG9geLX7CISE6Y2cnAbe6+Z7uFJXFqPhKRTmVmvczsFDMrjc1oXydc5CAFQDUFKQhm9m7gj5mWebh6SvKEmfUmNIXtQ+j3eAz4nLuvSzQwyYqSgoiINFPzkYiINCu4h2xVV1f7yJEjkw5DRKSgTJ8+fYW717RXruCSwsiRI5k2bVrSYYiIFBQze7P9Umo+EhGRNDlLCmZ2p5ktM7MZrSzfx8yeM7MtZvbFXMUhIiLZy2VN4W7Ci11as4pwC/73chiDiIh0QM6Sgrs/Qzjxt7Z8mbtPJdz+LiIieaAg+hTM7BIzm2Zm05YvX550OCIi3VZBJAV3v93dJ7j7hJqadq+oEhGRXVQQSUFERLpG0SSFxQve4J+Tf876dauTDkVEJG/l7OY1M7uP8Dz2ajOrIzwpsQzA3W8zsz2AaUAl0GhmnwfG5+qhWW+98gxH/ecLzBuzPxX7HZ6LTYiIFLycJQUPr/tra/lb7PjKvpwqrxgIwNtrV3bVJkVECk7RNB/17hc6qLes09VLIiKtKZqk0Lf/IAC2bWj11gkRkaJXNEmhsn+oKTS+reYjEZHWFE1S6Nm7gi1eBpt09ZGISGuKJilgxjqroERJQUSkVcWTFIANJRWUbV2bdBgiInmrqJLCptJ+9Ni2JukwRETyVlElha1llfRuWJ90GCIieauoksK2Hv3p25iTG6ZFRLqFokoKjT2rqPQNeGNj0qGIiOSlokoK1nsAPWwb6zeotiAikklRJYWS3gMAWL9Kj7oQEcmkqJLCY3M2A7Bx7bKEIxERyU9FlRTOO/ZAADbpSakiIhkVVVLo3S88FG/rejUfiYhkUlRJQU9KFRFpW1ElhcqYFBo3qvlIRCSTokoKqfKebKA39vaKpEMREclLRZUUANaW9KNss2oKIiKZFF1S2NhYTs8NC5MOQ0QkLxVdUtjUZxh9SrYlHYaISF4quqRQ33MAFY16p4KISCY5SwpmdqeZLTOzGa0sNzP7kZnNMbOXzOyQXMWSrnHdW/T3dWzd1tAVmxMRKSi5rCncDZzUxvKTgXFxuAS4NYexNLMxEymzBtas1g1sIiIt5SwpuPszQFt3iZ0O/NKDfwFVZjYkV/E0KascDMDaFYtzvSkRkYKTZJ9CLZB+GVBdnLcTM7vEzKaZ2bTly3fvF36v2X8AYOOqpbu1HhGR7ijJpGAZ5nmmgu5+u7tPcPcJNTU1u7XRPidfDcCWtW/t1npERLqjJJNCHTA8bXoYkPM2ncqBewCwbZ36FEREWkoyKUwGPhyvQjoCWOvuS3K90b79Q59C49ync70pEZGCU5qrFZvZfcBEoNrM6oCvA2UA7n4b8DhwCjAHeBu4OFex7BBXaQ/W0YdUxeCu2JyISEHJWVJw90ntLHfgv3O1/bas9ErWr1KfgohIS0V3RzPAVuvBUNcrOUVEWirKpLClciSV9nbSYYiI5J2iTAqvbuzDAF9NQ2PGK2BFRIpWUSaFsWPGUmmbWLl6ddKhiIjklaJMCuX9wtM0Vi/VexVERNIVZVK479XwPoX1y5UURETSFWVSuPyMdwOwebUeiicikq4ok0LVoPB0jWkzZiUciYhIfinKpFDadyBbvZSDXElBRCRdUSYFzFidGkB5zz5JRyIikleKMykAbzX2gw16p4KISLqiTQqpyiEMtjVJhyEikleKNinMjnc11zc0Jh2KiEjeKNqkMKZ0Of1tAyvWrEs6FBGRvFG0SaH8gLMAWL1MN7CJiDQp2qTQu7oWgNse+UfCkYiI5I+iTQr99xgJwOljLNlARETySNEmhcrBIwGYMfOVZAMREckjRZsUrFcVG7wn+zfOTjoUEZG8UbRJAeAtq6GhsSHpMERE8kZRJ4WGknJq9a5mEZFmRZ0UZtkYBtlqNm9TbUFEBIo8KYwbty/Vto4lK1YlHYqISF7IaVIws5PMbLaZzTGzKzIs39PMnjSzl8zsaTMblst4Wuo5cAQA37z3/7pysyIieStnScHMUsAtwMnAeGCSmY1vUex7wC/d/UDgWuD6XMWTSUW8LLVqm56WKiICua0pHAbMcfe57r4VuB84vUWZ8cCTcfypDMtzqv/QMQAMqFdns4gI5DYp1ALpDxaqi/PSvQicHcfPBCrMbGDLFZnZJWY2zcymLV++vNMCLKuqpdGN6sbOW6eISCHLZVLI9PwIbzH9ReBYM3seOBZYBNTv9CH32919grtPqKmp6bwIS8tZQRU1SgoiIgCU5nDddcDwtOlhwOL0Au6+GDgLwMz6Ame7+9ocxrSTpVbN4MYVXblJEZG8lcuawlRgnJmNMrNy4DxgcnoBM6s2s6YYrgTuzGE8GS0rqaGWZWyp170KIiI5SwruXg9cBjwBzAIecPdXzOxaMzstFpsIzDaz14DBwHW5iqc1S0uHUmsrqFu5vqs3LSKSd3J6n4K7P+7ue7n7GHe/Ls672t0nx/EH3X1cLPNxd9+Sy3gyOerQQymzBr7x6ye6etMiInmnqO9oBuhfuxcA1VuXJByJiEjyij4pVA4dB8Cg+sXtlBQR6f6KPilYZS1bKePAPquTDkVEJHFFnxQoKWERgyhdMz/pSEREEqekADRWjWQYS6lvaEw6FBGRRCkpAC9s7M8IW8ri1ZuSDkVEJFFKCsA7y+voa5v54i/0CG0RKW5KCkD/934JgOqtugJJRIqbkgJQOTTcq3D4pr8lHImISLKUFAAbMIp6UlRWVCYdiohIopQUAFJlLGAIfda/kXQkIiKJUlKItnqK0Sxm5YYuf/ySiEjeUFKInu95GHvaUua8pTubRaR4KSlEJ/d6lTJr4JaH/px0KCIiiWk3KZjZGDPrEccnmtlnzawq96F1rX7n/BiAIdsWJByJiEhysqkpPAQ0mNlY4OfAKODenEaVAKsOT0sdVr8w4UhERJKTTVJojG9ROxO40d0vB4bkNqwE9OjLSq9gVMN83D3paEREEpFNUthmZpOAjwCPxnlluQspOW+kxjDClrJsva5AEpHilE1SuBg4ErjO3eeZ2Sjg17kNKxl1ZXsyzhbxkTv+mXQoIiKJKG2vgLvPBD4LYGb9gQp3vyHXgSXh5N6z6bVlKwM21yUdiohIIrK5+uhpM6s0swHAi8BdZvaD3IfW9Xqd+zMAhm/Tnc0iUpyyaT7q5+7rgLOAu9z9ncDxuQ0rITV7s40UoxrmJR2JiEgiskkKpWY2BDiX7R3NWTGzk8xstpnNMbMrMiwfYWZPmdnzZvaSmZ3SkfV3utIerPB+7O3zWbtpW6KhiIgkIZukcC3wBPCGu081s9HA6+19yMxSwC3AycB4YJKZjW9R7CrgAXc/GDgP+ElHgs+Ft603+5YsYObidUmHIiLS5dpNCu7+W3c/0N0vjdNz3f3sLNZ9GDAnlt8K3A+c3nL1QNPzqvsBib/lZvCxH2cPW823Hnwm6VBERLpcNh3Nw8zsd2a2zMyWmtlDZjYsi3XXAum3B9fFeemuAS4wszrgceAzrcRwiZlNM7Npy5cvz2LTu67viIMAqN2szmYRKT7ZNB/dBUwGhhJO6o/Eee2xDPNa3io8Cbjb3YcBpwC/MrOdYnL32919grtPqKmpyWLTu2GPAwHYd+uM3G5HRCQPZZMUatz9Lnevj8PdQDZn5jpgeNr0MHZuHvoY8ACAuz8H9ASqs1h37vQZyGrvy7iSRSxZuynRUEREulo2SWGFmV1gZqk4XACszOJzU4FxZjbKzMoJHcmTW5RZALwHwMz2JSSF3LYPZeGlHofwjpI3uPDn/046FBGRLpVNUvgo4XLUt4AlwAcIj75oU3yI3mWEK5dmEa4yesXMrjWz02Kx/wE+YWYvAvcBF3kePI3uXceeSK2t5O2VurNZRIpLNo+5WACclj7PzD4P3JjFZx8ndCCnz7s6bXwm8K5sg+0qpS/9BoD9mZNwJCIiXWtX37z2hU6NIt98/M80eAkH2+ts3taQdDQiIl1mV5NCpiuLuo+yXqy2Cg6wuZxxyz+SjkZEpMvsalJIvN0/1/r16cM7St5g/nLd2SwixaPVPgUzW0/mk78BvXIWUZ4o61VB2cbFjG3Uw/FEpHi0WlNw9wp3r8wwVLh7ux3UBe/DfwDgyJKZrN+sh+OJSHHY1eaj7q9yCGu9N4eVzObsW/UmNhEpDkoKbejTt5JDS15l/or1SYciItIllBTaUNqzkirbyFhfQB7cUyciknNKCm258GEADreZnH6zLk0Vke4vm0dnrzezdS2GhfFx2qO7IsjEVA1nW6o3R5fMYPZSNSGJSPeXzVVEPyA83fRewuWo5wF7ALOBO4GJuQouH5T17sdR9a/Als1JhyIiknPZNB+d5O4/dff17r7O3W8HTnH33wD9cxxf8k69iV62lcNKZnH2T9SEJCLdWzZJodHMzjWzkjicm7as+/e+jjyaBox3l8xght7bLCLdXDZJ4UPAhcCyOFxIeIVmL8Kjsbu38j6U9OzH8SXT2VLfmHQ0IiI51W5ScPe57n6qu1fH4VR3n+Pum9z9710RZNKsV39Gl7zFcFvKWWpCEpFuLJurj4bFK42WmdlSM3vIzIZ1RXB548O/B+Dkkn/zipqQRKQby6b56C7CazSHArXAI3Fe8eg/Esr7cHrqn2pCEpFuLZukUOPud7l7fRzuBmpyHFf+6V3NfiVvsgcrOfXHRdFqJiJFKJuksMLMLjCzVBwuAFbmOrC8c8FDAJySmsKMRWsTDkZEJDeySQofBc4F3gKWAB8ALs5lUHmpehxYCeelnsKBc/TkVBHphrK5+miBu5/m7jXuPsjdzwDO6oLY8k+/4exVsojhtpSXF6u2ICLdz64+EO8LnRpFobj4cRy4KPUEm7c16smpItLt7GpSsE6NolD0G4b1rOLs1LMYjYy/+k9JRyQi0ql2NSlk9RPZzE4ys9lmNsfMrsiw/Idm9kIcXjOzNbsYT9fpO4gq28gRJbPYtE2Xp4pI99JqUmjlkdnrzGw94Z6FNplZCrgFOBkYD0wys/HpZdz9cnc/yN0PAn4MPLxbe9MVPvkMWIovlf4GQLUFEelWWn10trtX7Oa6DwPmuPtcADO7HzgdmNlK+UnA13dzm7lX1gtKUhzsc6hhDcu3ViUdkYhIp8nlm9dqgYVp03Vx3k7MbE9gFPDXVpZfYmbTzGza8uXLOz3QDrv0OQz4VtkdAOyr2oKIdBO5TAqZOqNb64s4D3jQ3RsyLXT32919grtPqKnJg5upq8eCpTiu5HnKqGfT1gZdiSQi3UIuk0IdMDxtehjhDW6ZnAfcl8NYOl/N3qTM+X7pTwAYc+XjCQckIrL7cpkUpgLjzGyUmZUTTvyTWxYys70Jb3B7LoexdL5L/wlWwvtKp5CigUbgbN3lLCIFLmdJwd3rCS/heQKYBTzg7q+Y2bVmdlpa0UnA/V5o7S9mUL0XKZxflN0AwPQ3V/PBnxZWbhMRSdfq1Uedwd0fBx5vMe/qFtPX5DKGnLr0OfjfgRyVmolta8QpYcq8VUlHJSKyy3LZfNT9lZTAwHGU4DzV56vNs0df8ViCQYmI7Dolhd316X+BlTCyYSF92AxAI7D/13WZqogUHiWF3VVSAoP2B5xXBm+/927Dlgb1L4hIwVFS6AyXPgslpbB2IacO29Q8e8q8VUoMIlJQlBQ6y+dfBuDHKy+hosf2+/aUGESkkCgpdJbKoTBgLHgDLw+8aocvVolBRAqFkkJn+sw0sBSseoO5VxywU2I44JonEgtNRCQbSgqdyQw++58wfuMBzP3mCTssXr+5XolBRPKakkJn6z8SqvcJ49fXMv+G9+2weP3mekbqPgYRyVNKCrlw2RRIlUPDVrj50J0SA8DIKx5TrUFE8o6SQq5cuSj8XfEazHuW+Te8j1SLh4mr1iAi+UZJIVdKy+Er88P4L94Py2bxxvXvo6Lnzo+bGnnFY4y5UslBRJJnhfZw0gkTJvi0adOSDiN7axbAjQeE8c+9BP33BGi1hlDRs5SXrzmxq6ITkSJhZtPdfUJ75VRTyLWqEfDJZ8L4TQfCyjcAmH9D5lpDU5OS+htEJAmqKXSVJS/CT48J4/89FWr2al7UVr9CyuCN63fuqBYR6YhsawpKCl1p6Uy49cgw/tE/w4jDmxd98KfPtfsuBiUIEdlVSgr5asUcuPmdYXzgXvCZqTssPuCaJ1i/ub7d1ajvQUQ6Qkkhn729Cr4bnpNEqhy+ugRSO/YvZFNzaKIahIi0R0kh39VvhW8NhcZtYfpzL4a7oTPItvaQLtMNcyJSvJQUCsXNh8GK2WH8rDvgwHPaLL6rN7spSYgUNyWFQrL6zXC5KoSX9XzhVehb0+7HdvduaPVLiBQPJYVC01APPzoI1i4M0+//IRxyUXjdZ5Y665EZqlWIdD95kRTM7CTgJiAF3OHuN2Qocy5wDeDAi+5+flvr7LZJocmyV+En2y9V5RNPQe0hu7SqXDxXSbULkcKUeFIwsxTwGnACUAdMBSa5+8y0MuOAB4Dj3H21mQ1y92VtrbfbJwUA99DXsPK17fM+/S8YtO9urbYjVzTtKl0JJZKf8iEpHAlc4+4nxukrAdz9+rQy3wFec/c7sl1vUSSFJpvXwq1Hw9oFYbqkDP57Cgwc06mbGXPlYzQk0IqoWodI18mHpPAB4CR3/3icvhA43N0vSyvze0Jt4l2EJqZr3P1PGdZ1CXAJwIgRI9755ptv5iTmvPX2Krjt3bCuLkyXlMGnnt3tmkM28u3R3kokIrsmH5LCOcCJLZLCYe7+mbQyjwLbgHOBYcCzwP7uvqa19RZVTaGl9Uvh9omwfnGYthRMuh/GHt+hDunOkm8JoyOUXKTYZJsUdn5MZ+epA4anTQ8DFmco8y933wbMM7PZwDhC/4O0VDEY/mcWbFwZksPaBXBvvK/hxG/BQedDr/5dFk42Vynla+LI9QuODh81gN988sicrV8kV3JZUyglNA29B1hEONGf7+6vpJU5idD5/BEzqwaeBw5y95Wtrbeoawot1W+FWZPhoY9tn1dSBhc+DHsenUjtYXck1bfR3alWJJAHzUcxiFOAGwn9BXe6+3Vmdi0wzd0nm5kB3wdOAhqA69z9/rbWqaTQiiUvwT3nwoYl2+cddxW843zoV5tcXF1AyUTao5pbniSFXFBSaMe2TTBzMvzukh3nv/ebsO9pzW9+kx11xeW6Irurb48UM75x0i59VklBYNVcmPEQ/PWbO84/7v/B+NOhelwycRWhXXmooUhL5SnjtetO2aXPKinIjlbNg1+eAWvm7zj/2K+EGsTg/cAskdAkeWqCKwy9ykqY9b8n79JnlRSkdWsXwauPwh+/vOP8vnvAqTfCqGOgvE8ysYkUsKYr2lJGTpLs7lw0oKQg2dmwDGY/Do98budlJ14f7oGoHqdahEiBU1KQjqvfAgueg3vOgYatOy7rMwhOuBZGHwuVQ5OJT0R2mZKC7L5V82DuU/D4l6CxRSfpoZ8ICWLk0V16w5yI7BolBelcjY2wdAbM+xv8+aqdl/cZDMd9DYYfDtV7FdyNcyLdnZKC5Fb9Vlg0Dd78Bzx9w841iZ5VcMSlMOxQGDYBevZLJk4RAZQUpKu5w8o5sPDfodO6cduOy60EDr4Qhh8Gww6DgWNVmxDpQkoKkrzN62DRdJj82e3vhEjXsz8c+emQKGrfCT0quj5GkSKhpCD5p7ERVr4eahOPXr5zbQLCVU7HfBGGHAR7HADlvbs+TpFuSElBCsOmNaFvYuFU+NtOr/AO+tTAUZ+BwfuHRNF3UNfGKNINKClIYXKHdYthyYuw5AV49vs7d2JD6Mg+5MMhSQzeP9xglyrr+nhFCoSSgnQvb68Kl8S+NQP+cnXmpicrgQPPgz32316r6D2g62MVyUNKCtL9NWyDFa+HZLF0Bvzjpszlxp6wY6IYMAZSuXzpoEj+UVKQ4rVhOSx9OdQqnrw2c60CwiWyg/cPCaNmX+gzsGvjFOlCSgoi6eq3worZIVEsnQHP3Zy5nKXg0I9BzT4waN/wV01Q0g0oKYi0xx02LA2JYvks+MePYOOyzGUtBe/8CAwcFx7jUT0W+o3QDXhSMJQURHZVYyOsq4Nlr4ZksexVePHe1suPPyMminFhGDgOevTtunhFsqCkINLZ3GHjCljxWrgJb8Xr8MI9sGl15vKjjo2JYq/wWI/qvaCyVrULSYSSgkhXqt8S3om94vWQNKb8tI2mqBLY78zYFJWWNHT3tuSQkoJIPnAPb7dLr1386yetl+9ZBQfAvU2RAAAMHElEQVSeCwNGQ/9R8e+eUNqj62KWbikvkoKZnQTcBKSAO9z9hhbLLwK+CyyKs2529zvaWqeSgnQb2zbF2sVrsGJO+PvyA62XH/lu6D8SBoyKCSP+7VXVZSFL4Uo8KZhZCngNOAGoA6YCk9x9ZlqZi4AJ7n5ZtutVUpBur6nvYvW88Pa71fNg6h2wcXnrnynvC3udtHPCqNhD79cWIPukkMvbOg8D5rj73BjQ/cDpwMw2PyVS7Mygb00Yhh8W5k28YvvyLRtg9fxQy2hKHNPvghkPtrK+kpgwRu9Y06gaoedFyU5ymRRqgYVp03XA4RnKnW1mxxBqFZe7+8IMZUSkSY++4S7sPfbfPu/UG8Pfhm2wZkFaLWN+uFFv9uOtr2/0xO21i6a+jP4jdVltkcplUshUZ23ZVvUIcJ+7bzGzTwG/AI7baUVmlwCXAIwYMaKz4xTpPlJlMHBMGJqceF346w7r39qxWervP4S5TwNPZ16fpeCAc0LCqBqxfagYqudHdVO57FM4ErjG3U+M01cCuPv1rZRPAavcvc2X+apPQSRHNq/dnixWzQ3jL9wL3tD6ZywFB35wx4RRNTwkjdLyrotd2pUPfQpTgXFmNopwddF5wPnpBcxsiLsviZOnAbNyGI+ItKVnPxh6UBianB6fEVW/NdzlvWbBjsNLv2n7bm9LwfjTod8w6Dc8JIx+w8LQs0qd4HkoZ0nB3evN7DLgCcIlqXe6+ytmdi0wzd0nA581s9OAemAVcFGu4hGR3VBaHvobBozecf5Zt4e/zUljYUgWa+vC8Ooj8MrDba977PEhYTQljqakUTlUHeEJ0M1rIpJ7jY3w9gpYuzAkizXx79SfZX6zXrrhh29PFP2G75g4evZTbSNLid+nkCtKCiLd1Na3Yd2i7YmjKXm01TwF4ZLbscdnThwVQ9QhHuVDn4KISPbKe29/0my6M28Nfxsbww18axfumDim3Aav/7ntdQ8/IiSJqgzNVD3bvLal6KimICLdx9aNsHYRrE3r15j+i9YfTtjESsJrW9NrG00JpO8e3aK2oZqCiBSf8j5Qs1cYmhx31fbxxobwgMK1dTvWOF5+EF5/ou11jzhyxxpG83htt6ptKCmISPEoSUHlkDAMP3T7/FO+u318y/pY26hLa6ZaCLP/CAuea3v9o/8rJInK2nD1VGW8iqpyaMF0iispiIik61EBg/YJQyaNDeE1rmvjfRvrFsG6xeHvrEfaX//oiTFpNCWO+LdfbV7cu6GkICLSESWp7b/+mx5Y2FLDtvBIkXWLw/0b6xanJY5H275LHMJb+5qTxtDt921Ujch5U5WSgohIZ0uVhY7qquFkfg4o0FAfahyZEsfMyZkTR6oc/l8bj1DvBEoKIiJJSJWGJqN+tcChmcs0dYyvWxSG/iNzHpaSgohIvkrvGKfdq0k7Z5NdshURESkISgoiItJMSUFERJopKYiISDMlBRERaaakICIizZQURESkmZKCiIg0K7j3KZjZcuDNXfx4NbCiE8NJWnfaH+1LfupO+wLda386ui97untNe4UKLinsDjObls1LJgpFd9of7Ut+6k77At1rf3K1L2o+EhGRZkoKIiLSrNiSwu1JB9DJutP+aF/yU3faF+he+5OTfSmqPgUREWlbsdUURESkDUoKIiLSrGiSgpmdZGazzWyOmV2RdDztMbPhZvaUmc0ys1fM7HNx/gAz+4uZvR7/9o/zzcx+FPfvJTM7JNk92JmZpczseTN7NE6PMrMpcV9+Y2blcX6POD0nLh+ZZNwtmVmVmT1oZq/G43NkgR+Xy+O/sRlmdp+Z9SyUY2Nmd5rZMjObkTavw8fCzD4Sy79uZh/Jo335bvx39pKZ/c7MqtKWXRn3ZbaZnZg2f/fOde7e7QcgBbwBjAbKgReB8UnH1U7MQ4BD4ngF8BowHvgOcEWcfwXw7Th+CvBHwIAjgClJ70OGffoCcC/waJx+ADgvjt8GXBrHPw3cFsfPA36TdOwt9uMXwMfjeDlQVajHBagF5gG90o7JRYVybIBjgEOAGWnzOnQsgAHA3Pi3fxzvnyf78l6gNI5/O21fxsfzWA9gVDy/pTrjXJf4P8ou+rKPBJ5Im74SuDLpuDq4D38ATgBmA0PivCHA7Dj+U2BSWvnmcvkwAMOAJ4HjgEfjf8wVaf/gm48R8ARwZBwvjeUs6X2I8VTGk6i1mF+ox6UWWBhPiKXx2JxYSMcGGNniRNqhYwFMAn6aNn+HcknuS4tlZwL3xPEdzmFNx6UzznXF0nzU9A+/SV2cVxBiFf1gYAow2N2XAMS/g2KxfN/HG4EvA41xeiCwxt3r43R6vM37EpevjeXzwWhgOXBXbAq7w8z6UKDHxd0XAd8DFgBLCN/1dArz2DTp6LHI62OU5qOEmg7kcF+KJSlYhnkFcS2umfUFHgI+7+7r2iqaYV5e7KOZvR9Y5u7T02dnKOpZLEtaKaGKf6u7HwxsJDRRtCaf94XY3n46oQliKNAHODlD0UI4Nu1pLfa83ycz+xpQD9zTNCtDsU7Zl2JJCnXA8LTpYcDihGLJmpmVERLCPe7+cJy91MyGxOVDgGVxfj7v47uA08xsPnA/oQnpRqDKzEpjmfR4m/clLu8HrOrKgNtQB9S5+5Q4/SAhSRTicQE4Hpjn7svdfRvwMHAUhXlsmnT0WOT1MYod3+8HPuSxTYgc7kuxJIWpwLh4RUU5oYNscsIxtcnMDPg5MMvdf5C2aDLQdHXERwh9DU3zPxyvsDgCWNtUhU6au1/p7sPcfSThu/+ru38IeAr4QCzWcl+a9vEDsXxe/HJz97eAhWa2d5z1HmAmBXhcogXAEWbWO/6ba9qfgjs2aTp6LJ4A3mtm/WPN6b1xXuLM7CTgK8Bp7v522qLJwHnxarBRwDjg33TGuS7JDqIu7sA5hXAFzxvA15KOJ4t4jyZU+14CXojDKYT22yeB1+PfAbG8AbfE/XsZmJD0PrSyXxPZfvXR6PgPeQ7wW6BHnN8zTs+Jy0cnHXeLfTgImBaPze8JV6wU7HEBvgG8CswAfkW4oqUgjg1wH6EvZBvhV/LHduVYENrr58Th4jzalzmEPoKmc8BtaeW/FvdlNnBy2vzdOtfpMRciItKsWJqPREQkC0oKIiLSTElBRESaKSmIiEgzJQUREWmmpCAiIs2UFESyYGYHmdkpadOn7dJjiTOv+/Nm1rsz1iWyu3SfgkgWzOwiws1Ol+Vg3fPjuld04DMpd2/o7FhEVFOQbsXMRlp48c3P4otj/mxmvVopO8bM/mRm083sWTPbJ84/J75w5kUzeyY+LuBa4INm9oKZfdDMLjKzm2P5u83sVgsvRZprZsfGF6bMMrO707Z3q5lNi3F9I877LOFBdE+Z2VNx3iQzeznG8O20z28ws2vNbApwpJndYGYz4wtYvpebb1SKTtK32GvQ0JkD4Xn09cBBcfoB4IJWyj4JjIvjhxOe4wPhEQi1cbwq/r0IuDnts83TwN2EB/0Z4Ymj64ADCD+6pqfF0vS4hRTwNHBgnJ4PVMfxoYTnEdUQnsj6V+CMuMyBc5vWRXi8gaXHqUHD7g6qKUh3NM/dX4jj0wmJYgfxkeRHAb81sxcIL1YZEhf/A7jbzD5BOIFn4xF3d0JCWeruL7t7I/BK2vbPNbP/AM8D+xHentXSocDTHp5a2vSo5GPisgbCU3MhJJ7NwB1mdhbw9k5rEtkFpe0XESk4W9LGG4BMzUclhBfJHNRygbt/yswOB94HvGBmO5VpY5uNLbbfCJTGJ1l+ETjU3VfHZqWeGdaT6Xn4TTZ77Edw93ozO4zwVNPzgMsIjyQX2S2qKUhR8vDConlmdg40v9T9HXF8jLtPcferCa+bHA6sJ7wre1dVEl7Is9bMBrPji2zS1z0FONbMqs0sRXhV5N9arizWdPq5++PA5wlPbhXZbaopSDH7EHCrmV0FlBH6BV4Evmtm4wi/2p+M8xYAV8Smpus7uiF3f9HMnic0J80lNFE1uR34o5ktcff/MrMrCe8zMOBxd//DzmukAviDmfWM5S7vaEwimeiSVBERaabmIxERaabmI+n2zOwWwnui093k7nclEY9IPlPzkYiINFPzkYiINFNSEBGRZkoKIiLSTElBRESa/X+U7W3JxN484wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "cvresult = pd.DataFrame.from_csv('6_nestimators.csv')\n",
    "        \n",
    "# plot\n",
    "test_means = cvresult['test-mlogloss-mean']\n",
    "test_stds = cvresult['test-mlogloss-std'] \n",
    "        \n",
    "train_means = cvresult['train-mlogloss-mean']\n",
    "train_stds = cvresult['train-mlogloss-std'] \n",
    "\n",
    "x_axis = range(0, cvresult.shape[0])\n",
    "        \n",
    "pyplot.errorbar(x_axis, test_means, yerr=test_stds ,label='Test')\n",
    "pyplot.errorbar(x_axis, train_means, yerr=train_stds ,label='Train')\n",
    "pyplot.title(\"XGBoost n_estimators vs Log Loss\")\n",
    "pyplot.xlabel( 'n_estimators' )\n",
    "pyplot.ylabel( 'Log Loss' )\n",
    "pyplot.savefig( 'n_estimators6.png' )\n",
    "\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存数据测试使用\n",
    "#保存模型\n",
    "import pickle\n",
    "pickle.dump(xgb_last, open(\"xgb_model.pkl\", 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of train is: 0.47483957907478325\n"
     ]
    }
   ],
   "source": [
    "#保存数据\n",
    "xgb = pickle.load(open(\"xgb_model.pkl\", 'rb'))\n",
    "\n",
    "train_predprob = xgb.predict_proba(X_train)\n",
    "logloss = log_loss(y_train, train_predprob)\n",
    "\n",
    "#Print model report:\n",
    "print('logloss of train is:', logloss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>...</th>\n",
       "      <th>virtual</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2950</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>11</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2850</td>\n",
       "      <td>1425.000000</td>\n",
       "      <td>950.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>24</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3758</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3300</td>\n",
       "      <td>1650.000000</td>\n",
       "      <td>1100.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>11</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>4900</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 227 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bathrooms  bedrooms  price  price_bathrooms  price_bedrooms  room_diff  \\\n",
       "0        1.0         1   2950      1475.000000     1475.000000        0.0   \n",
       "1        1.0         2   2850      1425.000000      950.000000       -1.0   \n",
       "2        1.0         1   3758      1879.000000     1879.000000        0.0   \n",
       "3        1.0         2   3300      1650.000000     1100.000000       -1.0   \n",
       "4        2.0         2   4900      1633.333333     1633.333333        0.0   \n",
       "\n",
       "   room_num  Year  Month  Day  ...   virtual  walk  walls  war  washer  water  \\\n",
       "0       2.0  2016      6   11  ...         0     0      0    0       0      0   \n",
       "1       3.0  2016      6   24  ...         0     0      0    1       0      0   \n",
       "2       2.0  2016      6    3  ...         0     0      0    0       0      0   \n",
       "3       3.0  2016      6   11  ...         0     0      0    0       0      0   \n",
       "4       4.0  2016      4   12  ...         0     0      0    1       0      0   \n",
       "\n",
       "   wheelchair  wifi  windows  work  \n",
       "0           0     0        0     0  \n",
       "1           0     0        0     0  \n",
       "2           0     0        0     0  \n",
       "3           1     0        0     0  \n",
       "4           0     0        0     0  \n",
       "\n",
       "[5 rows x 227 columns]"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#读取测试数据\n",
    "file = open('C:/Users/chenxi/Desktop/disanzhouzuoye/第三周作业/code/data/RentListingInquries_FE_test.csv')\n",
    "test = pd.read_csv(file)\n",
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 74659 entries, 0 to 74658\n",
      "Columns: 227 entries, bathrooms to work\n",
      "dtypes: float64(9), int64(218)\n",
      "memory usage: 129.3 MB\n"
     ]
    }
   ],
   "source": [
    "test.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_test = test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将训练好的模型导入\n",
    "xgb = pickle.load(open(\"xgb_model.pkl\", 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "#测试并保存结果\n",
    "y_test_pred = xgb.predict_proba(X_test)\n",
    "\n",
    "out_df1 = pd.DataFrame(y_test_pred)\n",
    "out_df1.columns = [\"high\", \"medium\", \"low\"]\n",
    "\n",
    "out_df = pd.concat([out_df1], axis = 1)\n",
    "out_df.to_csv(\"C:/Users/chenxi/Desktop/xgb_Rent.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
